跳到主要内容

把图床的图片备份到本地

前言

随着博客写的越来越多,对于图片保存在人家那里也就越来越慌,所以把这里使用 Java 编写一个脚本把这些图片下载到本地

为了方便管理,下载下来的图片存放和图床链接一样的文件目录结构

https://image.alsritter.icu/images/2021/02/08/NuPfKO3a459cbe03e42c6a.jpg

首先遍历所有 markdown文件,把这个链接提取出来,再下载

取得文件地址

全局变量:

static String rootPath = "C:/Users/alsritter/Desktop/New folder/";
static List<File> allFilePath = new LinkedList<>();
static List<String> pictureURLPath = new CopyOnWriteArrayList<>(); // 因为这里涉及到同步的问题,所以需要使用线程安全的 List

首先取得全部文件的地址

private static void getFilePath(File file) {
File[] fs = file.listFiles();
for (File f : Objects.requireNonNull(fs)) {
if (f.isDirectory()) //若是目录,则递归打印该目录下的文件
getFilePath(f);
if (f.isFile()) { //若是文件,先过滤在保存到 Array里面
if (f.getName().endsWith(".md")) {
allFilePath.add(f);
}
}
}
}

多线程批量获取 URL

这种不确定如何分配线程工作的情况就最适合使用线程池了

private static void getImageURL()throws InterruptedException  {
long time = System.currentTimeMillis();
// 使用全部文件数量当作栅栏数量
CountDownLatch countDownLatch = new CountDownLatch(allFilePath.size());
ExecutorService executorService = Executors.newFixedThreadPool(10);

for (int i = 0; i < allFilePath.size(); i++) {
File temp = allFilePath.get(i);
executorService.execute(() -> {
try (BufferedReader in = new BufferedReader(new FileReader(temp))) {
String line;
while (((line = in.readLine()) != null)) {
// 对这一行正则(注意不要匹配到空格了)
Pattern p = Pattern.compile("https://i.loli.net/\d{4}/\d{2}/\d{2}\S*");
Matcher matcher = p.matcher(line);
while (matcher.find()) {
String url = matcher.group();
url = url.substring(0, url.length() - 1);
System.out.println(url);
pictureURLPath.add(url);
}
}
} catch (IOException e) {
e.printStackTrace();
}
countDownLatch.countDown();
});
}
countDownLatch.await();
System.out.println("用时:" + (System.currentTimeMillis() - time));
executorService.shutdown();
}

下载图片

先引入依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.alsritter</groupId>
<artifactId>imageToLocal</artifactId>
<version>1.0-SNAPSHOT</version>

<dependencies>
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.8.0</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.13</version>
</dependency>
</dependencies>

</project>
public static void downloadHttpUrl(HttpClient client, String url, String dir, String fileName) {
// 发送get请求
HttpGet request = new HttpGet(url);
// 设置请求和传输超时时间
RequestConfig requestConfig = RequestConfig.custom()
.setSocketTimeout(50000).setConnectTimeout(50000).build();
request.setConfig(requestConfig);

//设置请求头
request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.79 Safari/537.1");

try {
HttpResponse response = client.execute(request);
if (HttpStatus.SC_OK == response.getStatusLine().getStatusCode()) {
HttpEntity entity = response.getEntity();
InputStream in = entity.getContent();
FileUtils.copyInputStreamToFile(in, new File(dir + fileName));
System.out.println("下载图片成功:" + dir + fileName);
} else {
System.out.println("下载图片失败:" + dir + fileName);
}
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
} finally {
request.releaseConnection();
}
}

多线程下载图片

public static void saveImageToLocal() throws InterruptedException {
long time = System.currentTimeMillis();
// 使用全部文件数量当作栅栏数量
CountDownLatch countDownLatch = new CountDownLatch(pictureURLPath.size());
ExecutorService executorService = Executors.newFixedThreadPool(10);
HttpClient httpClient = HttpClients.createDefault();

for (int i = 0; i < pictureURLPath.size(); i++) {
String url = pictureURLPath.get(i);
executorService.execute(() -> {
// 先取得文件名称,和保存路径
String filename = url.substring(url.lastIndexOf('/') + 1);
// https://i.loli.net/.*\/
// 对这一行正则
Pattern p = Pattern.compile("https://i.loli.net/.*/");
Matcher matcher = p.matcher(url);
String dirPath;
while (matcher.find()) {
dirPath = matcher.group();
// 去掉 https://i.loli.net/
dirPath = dirPath.substring(19);
// 开始下载
downloadHttpUrl(httpClient, url, "./" + dirPath, filename);
}
countDownLatch.countDown();
});
}
countDownLatch.await();
System.out.println("用时:" + (System.currentTimeMillis() - time));
executorService.shutdown();
}

完整代码

public class StartDownLoad {

// 这里指定 markdown 笔记的位置
static String rootPath = "C:/Users/alsritter/Desktop/New folder/";
static List<File> allFilePath = new LinkedList<>();
static List<String> pictureURLPath = new CopyOnWriteArrayList<>(); // 因为这里涉及到同步的问题,所以需要使用线程安全的 List


private static void getFilePath(File file) {
File[] fs = file.listFiles();
for (File f : Objects.requireNonNull(fs)) {
if (f.isDirectory()) //若是目录,则递归打印该目录下的文件
getFilePath(f);
if (f.isFile()) { //若是文件,先过滤在保存到 Array里面
if (f.getName().endsWith(".md")) {
allFilePath.add(f);
}
}
}
}

private static void getImageURL() throws InterruptedException {
long time = System.currentTimeMillis();
// 使用全部文件数量当作栅栏数量
CountDownLatch countDownLatch = new CountDownLatch(allFilePath.size());
ExecutorService executorService = Executors.newFixedThreadPool(10);

for (int i = 0; i < allFilePath.size(); i++) {
File temp = allFilePath.get(i);
executorService.execute(() -> {
try (BufferedReader in = new BufferedReader(new FileReader(temp))) {
String line;
while (((line = in.readLine()) != null)) {
// 对这一行正则
Pattern p = Pattern.compile("https://i.loli.net/\\d{4}/\\d{2}/\\d{2}\\S*");
Matcher matcher = p.matcher(line);
while (matcher.find()) {
String url = matcher.group();
url = url.substring(0, url.length() - 1);
System.out.println(url);
pictureURLPath.add(url);
}
}
} catch (IOException e) {
e.printStackTrace();
}
countDownLatch.countDown();
});
}
countDownLatch.await();
System.out.println("用时:" + (System.currentTimeMillis() - time));
executorService.shutdown();
}


public static void saveImageToLocal() throws InterruptedException {
long time = System.currentTimeMillis();
// 使用全部文件数量当作栅栏数量
CountDownLatch countDownLatch = new CountDownLatch(pictureURLPath.size());
ExecutorService executorService = Executors.newFixedThreadPool(10);
HttpClient httpClient = HttpClients.createDefault();

for (int i = 0; i < pictureURLPath.size(); i++) {
String url = pictureURLPath.get(i);
executorService.execute(() -> {
// 先取得文件名称,和保存路径
String filename = url.substring(url.lastIndexOf('/') + 1);
// https://i.loli.net/.*\/
// 对这一行正则
Pattern p = Pattern.compile("https://i.loli.net/.*/");
Matcher matcher = p.matcher(url);
String dirPath;
while (matcher.find()) {
dirPath = matcher.group();
// 去掉 https://i.loli.net/
dirPath = dirPath.substring(19);
// 开始下载
downloadHttpUrl(httpClient, url, "./" + dirPath, filename);
}
countDownLatch.countDown();
});
}
countDownLatch.await();
System.out.println("用时:" + (System.currentTimeMillis() - time));
executorService.shutdown();
}

public static void downloadHttpUrl(HttpClient client, String url, String dir, String fileName) {
// 发送get请求
HttpGet request = new HttpGet(url);
// 设置请求和传输超时时间
RequestConfig requestConfig = RequestConfig.custom()
.setSocketTimeout(50000).setConnectTimeout(50000).build();
request.setConfig(requestConfig);

//设置请求头
request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.79 Safari/537.1");

try {
HttpResponse response = client.execute(request);
if (HttpStatus.SC_OK == response.getStatusLine().getStatusCode()) {
HttpEntity entity = response.getEntity();
InputStream in = entity.getContent();
FileUtils.copyInputStreamToFile(in, new File(dir + fileName));
System.out.println("下载图片成功:" + dir + fileName);
} else {
System.out.println("下载图片失败:" + dir + fileName);
}
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
} finally {
request.releaseConnection();
}
}

public static void main(String[] args) throws InterruptedException {
// ===================第一步先取得全部的文件地址===================
File dir = new File(rootPath);
getFilePath(dir);

// ===================第二步开辟多线程取得图片URL===================
getImageURL();

// ===================下载图片到指定文件夹下===================
saveImageToLocal();
}

}